Take Home Exercise 3

VAST Challenge 3

Huang Anni (Singapore Management University)
05-07-2022

The task

With reference to Challenge 3 of VAST Challenge 2022, you are required to reveal the economic of the city of Engagement, Ohio USA by using appropriate static and interactive statistical graphics methods

Introduction

This exercise requires us to apply the skills you had learned in Lesson 1 and Hands-on Exercise 1 to reveal the demographic of the city of Engagement, Ohio USA by using appropriate static statistical graphics methods. The data should be processed by using appropriate tidyverse family of packages and the statistical graphics must be prepared using ggplot2 and its extensions. image

y <- as.POSIXct(financial$timestamp, format="%Y-%m-%d %H:%M:%S")
financial$year <- format(y, format="%Y")
financial$month <- format(y, format="%m")

income <- financial %>%
  filter(category %in% c('Wage', 'RentAdjustment')) %>%
  group_by(year, month) %>%
  summarise(income = mean(amount))

outcome <- financial %>%
  filter(!category %in% c('Wage', 'RentAdjustment')) %>%
  group_by(year, month) %>%
  summarise(outcome = mean(abs(amount)))
total <- merge(income,outcome,by=c("year","month"))
total$date <- paste(total$year, total$month, sep='-')
total$coef <- total$outcome / total$income
unique(total$date)
 [1] "2022-03" "2022-04" "2022-05" "2022-06" "2022-07" "2022-08"
 [7] "2022-09" "2022-10" "2022-11" "2022-12" "2023-01" "2023-02"
[13] "2023-03" "2023-04" "2023-05"
library(rmarkdown)
paged_table(financial, options = list(rows.print = 15, cols.print = 5))
total$coef <- total$outcome/total$income
fig <- plot_ly(total, x = ~date, y = ~coef, type = 'scatter',mode = 'lines+markers')
fig <- fig %>% layout(title = 'Fig 1.Residence\'s living status through time',
                      xaxis = list(title = "Date"),
                      yaxis = list (title = "Spend/Income"))
fig
total$remain <- (total$income - total$outcome)
total$remain <- round(total$remain, 1)
ggplot(data=total, aes(x=date, y=remain)) +
  geom_bar(stat = "identity", width = 0.5, fill="steelblue") +
  labs(y= 'Fig2. Total Deposit\n(Dollar)', x= 'Date',
       title = "Trend of Living Standards",
       subtitle = "Highest remaining in 2022-03") +
  geom_text(aes(label = remain), vjust = -1.5, colour = "white") +
  theme(axis.title.y= element_text(angle=90),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.ticks.x= element_blank(),
        panel.background= element_blank(), 
        axis.line= element_line(color= 'grey'), 
        panel.grid.major.y = element_line(color = "grey",size = 0.5,linetype = 2))

total$remain <- (total$income - total$outcome)
total$remain <- round(total$remain, 1)
wage <- financial %>%
  filter(category == 'Wage') %>%
  group_by(participantId) %>%
  summarise(wage = mean(amount))
brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)
ggplot(data=total, aes(x=date, y=remain)) +
  geom_bar(stat = "identity", width = 0.5, fill="steelblue") +
  coord_cartesian(ylim = c(0, 160)) + 
  labs(y= 'Total Deposit', x= 'Date',
       title = "Fig2. Trend of Living Standards",
       subtitle = "Highest remaining in 2022-03") +
  geom_text(aes(label = remain), vjust = -1, colour = "black") +
  theme(axis.title.y= element_text(angle=90),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.ticks.x= element_blank(),
        panel.background= element_blank(), 
        axis.line= element_line(color= 'grey'), 
        panel.grid.major.y = element_line(color = "grey",size = 0.5,linetype = 2))

wage <- financial %>%
  filter(category == "Wage") %>%
  group_by(participantId) %>%
  summarise(wage = mean(amount))
brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)
unique(wage$Wage_Group)
[1] 201-300 301-400 101-200 <=100   >400   
Levels: <=100 101-200 201-300 301-400 >400
p <- ggplot(data=wage, aes(x=wage, fill=Wage_Group, y=Wage_Group)) +
    geom_histogram(position="dodge",aes(y = ..density..), binwidth=density(wage$wage)$bw) +
    geom_density(fill="red", alpha = 0.2)+
  labs(y= 'Density', x= 'Wage',
       title = "Fig3: Wage Distribution",
       subtitle = "Most people get 50 per month")

ggplotly(p)
#fig <- plot_ly(wage, x = ~wage, fill = ~Wage_Group ,type = "histogram")

#fig
income <- financial %>%
  filter(category %in% c('Wage', 'RentAdjustment')) %>%
  group_by(participantId) %>%
  summarise(income = sum(amount))

outcome <- financial %>%
  filter(!category %in% c('Wage', 'RentAdjustment')) %>%
  group_by(participantId) %>%
  summarise(outcome = sum(abs(amount)))

comparison <- merge(income, outcome, by='participantId') %>%
  merge(wage, by='participantId')
comparison$ratio <- comparison$outcome / comparison$income

p <- ggplot(comparison, aes(x = ratio, y = Wage_Group)) +
  geom_density_ridges(calc_ecdf = TRUE,
                      quantiles = 4, 
                      quantile_lines = TRUE,
                      alpha = .2) +
  labs(y= 'Wage Group', x= 'Ratio in wage',
       title = "Fig3: Wage Distribution",
       subtitle = "People with low wages tend to spend most of their money")+
  theme_ridges() + 
  scale_fill_viridis_d(name = "Quartiles")+
  ggtitle("Fig3: Wage Distribution")+
  theme(plot.title = element_text(size = 12), 
        legend.position = "top")
p

education <- financial %>%
  filter(category == "Education") %>%
  group_by(participantId) %>%
  summarise(education_spend = mean(amount))

brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)
unique(wage$Wage_Group)
[1] 201-300 301-400 101-200 <=100   >400   
Levels: <=100 101-200 201-300 301-400 >400
p <- ggplot(data=wage, aes(x=wage, fill=Wage_Group, y=Wage_Group)) +
    geom_histogram(position="dodge",aes(y = ..density..), binwidth=density(wage$wage)$bw) +
  labs(y= 'Density', x= 'Wage',
       title = "Fig3: Wage Distribution",
       subtitle = "Most people get 50 per month")

ggplotly(p)
outcome_different_cats <- financial %>%
  filter(!category %in% c('Wage', 'RentAdjustment')) %>%
  group_by(participantId, category) %>%
  summarise(outcome = mean(abs(amount))) %>%
  merge(wage, by='participantId')
outcome_different_cats$ratio <- outcome_different_cats$outcome / outcome_different_cats$wage

p <- ggplot(data=outcome_different_cats, aes(x= ratio)) + 
  geom_density() +
  facet_grid(Wage_Group ~ category)
ggplotly(p)
participant_data <- read_csv('./data/Participants.csv')
print(length(participant_data$participantId))
[1] 1011
participant_finance <- merge(participant_data, wage,by=c("participantId"))
# participant_finance
fig <- plot_ly(data = financial, 
               x = ~participant_finance$age, 
               y = ~participant_finance$wage,
               color = ~participant_finance$educationLevel)
# Divide by levels of "sex", in the vertical direction
# fig <- fig + facet_grid(educationLevel ~ .)

fig <- ggplotly(p)

fig